# Load packages
library(tidyverse)
library(janitor)
library(here)
# themes
theme_set(theme_minimal())
library(nycgeo)
library(sf)
library(tidyverse)
nyc_join <- merge(nta_sf,nta_acs_data)
nyc_join <- nyc_join %>%
st_transform(., 4269)
county_list <- nyc_join %>% pull(county_name) %>% unique()
library(tidycensus)
census_api_key("0cc07f06386e317f312adef5e0892b0d002b7254")
census_data <- get_acs(state = "NY",
county = c(county_list),
geography = "tract",
variables = c(medincome = "B19013_001",
total_pop1 = "B01003_001",
below_poverty_100 = "B06012_002",
below_poverty_100to150 = "B06012_003",
median_rent = "B25031_001",
hholds_snap = "B22003_002",
hholds_disability ="B22010_003",
unemployed = "B23025_005",
latinx = "B03002_012",
white = "B03002_003",
black = "B03002_004",
native = "B03002_005",
asian = "B03002_006",
under19_noinsurance = "B27010_017",
age19_34_noinsurance = "B27010_033",
age35_64_noinsurance = "B27010_050",
age65plus_noinsurance = "B27010_066",
naturalized_citizen = "B05001_005",
noncitizen ="B05001_006"),
year = 2019,
output = "wide",
survey = "acs5",
geometry = TRUE) %>%
dplyr::select(-c(NAME, ends_with("M"))) %>%
rename_at(vars(ends_with("E")), .funs = list(~str_sub(., end = -2)))
##
|
| | 0%
|
| | 1%
|
|= | 1%
|
|= | 2%
|
|== | 2%
|
|== | 3%
|
|=== | 4%
|
|=== | 5%
|
|==== | 5%
|
|==== | 6%
|
|===== | 7%
|
|===== | 8%
|
|====== | 9%
|
|======= | 10%
|
|======== | 11%
|
|======== | 12%
|
|========= | 13%
|
|========== | 14%
|
|========== | 15%
|
|=========== | 15%
|
|=========== | 16%
|
|============ | 17%
|
|============ | 18%
|
|============= | 18%
|
|============= | 19%
|
|============== | 20%
|
|=============== | 21%
|
|=============== | 22%
|
|================ | 23%
|
|================= | 24%
|
|================== | 25%
|
|=================== | 27%
|
|==================== | 28%
|
|======================= | 33%
|
|========================= | 36%
|
|============================ | 41%
|
|=============================== | 44%
|
|================================= | 47%
|
|========================================== | 60%
|
|==================================================== | 74%
|
|===================================================== | 76%
|
|====================================================== | 77%
|
|======================================================= | 78%
|
|========================================================= | 81%
|
|============================================================ | 86%
|
|==================================================================== | 97%
|
|======================================================================| 100%
library(openxlsx)
nta_to_census <- openxlsx::read.xlsx(here("ethnic", "Data", "census_to_nta.xlsx")) %>%
dplyr::select(GEOID, NTACode)
census_nta <- census_data %>%
merge(., nta_to_census) %>%
mutate(uninsured = under19_noinsurance + age19_34_noinsurance + age35_64_noinsurance + age65plus_noinsurance) %>%
dplyr::select(-c(under19_noinsurance, age19_34_noinsurance, age35_64_noinsurance, age65plus_noinsurance)) %>%
mutate(NTACode = as.character(NTACode),
NTACode = ifelse(is.na(NTACode), "NA", NTACode)) %>%
group_by(NTACode) %>%
summarize(geometry = st_union(geometry),
total_pop = sum(total_pop1),
mean_income = mean(medincome, na.rm = TRUE),
below_poverty_line_count = sum(below_poverty_100),
below_poverty_line_and_50_count = sum(below_poverty_100to150),
mean_rent = mean(median_rent, na.rm = TRUE),
unemployment_count = sum(unemployed),
latinx_count = sum(latinx),
white_count = sum(white),
black_count = sum(black),
native_count = sum(native),
asian_count = sum(asian),
naturalized_citizen_count = sum(naturalized_citizen),
noncitizen_count = sum(noncitizen),
uninsured_count = sum(uninsured)) %>%
filter(total_pop != 0) %>%
mutate(below_poverty_line_count = below_poverty_line_count/total_pop,
below_poverty_line_and_50_count = below_poverty_line_and_50_count/total_pop,
unemployment_count = unemployment_count/total_pop,
latinx_count = latinx_count/total_pop,
white_count = white_count/total_pop,
black_count = black_count/total_pop,
native_count = native_count/total_pop,
asian_count = asian_count/total_pop,
naturalized_citizen_count = naturalized_citizen_count/total_pop,
noncitizen_count = noncitizen_count/total_pop,
uninsured_count = uninsured_count/total_pop) %>%
dplyr::rename(nta_id = NTACode)
names(census_nta) <- gsub(names(census_nta), pattern = "_count", replacement = "_percent")
grocery <- read_csv(here("ethnic","Data","grocery.csv")) %>%
drop_na(Georeference) %>%
separate(Georeference, into=c("Point", "longitude", "latitude"), " ") %>%
mutate(latitude = str_remove_all(latitude, "[)]"),
longitude = str_remove_all(longitude, "[()]"),
) %>%
dplyr::select(-c(Point)) %>%
mutate(latitude = as.numeric(latitude),
longitude = as.numeric(longitude)) %>%
st_as_sf(coords = c("longitude", "latitude"), crs = 4269)
public_schools <- st_read(here("ethnic","Data","schools", "Public_Schools_Points_2011-2012A.shp")) %>%
st_transform(., 4269)
## Reading layer `Public_Schools_Points_2011-2012A' from data source
## `/Users/freddy/Documents/GitHub/454/ethnic/Data/schools/Public_Schools_Points_2011-2012A.shp'
## using driver `ESRI Shapefile'
## Simple feature collection with 1709 features and 17 fields
## Geometry type: POINT
## Dimension: XY
## Bounding box: xmin: 916395 ymin: 124478.7 xmax: 1085825 ymax: 283328.5
## Projected CRS: NAD83 / New York Long Island (ftUS)
subway_stations <- st_read(here("ethnic","Data","stations", "geo_export_85568705-efba-4456-bdc0-3d70ff2cf8e5.shp")) %>%
st_transform(., 4269)
## Reading layer `geo_export_85568705-efba-4456-bdc0-3d70ff2cf8e5' from data source
## `/Users/freddy/Documents/GitHub/454/ethnic/Data/stations/geo_export_85568705-efba-4456-bdc0-3d70ff2cf8e5.shp'
## using driver `ESRI Shapefile'
## Simple feature collection with 473 features and 5 fields
## Geometry type: POINT
## Dimension: XY
## Bounding box: xmin: -74.03088 ymin: 40.57603 xmax: -73.7554 ymax: 40.90313
## Geodetic CRS: WGS84(DD)
bus_stations <- st_read(here("ethnic","Data","bus", "bus_stops_nyc_may2020.shp")) %>%
st_transform(., 4269)
## Reading layer `bus_stops_nyc_may2020' from data source
## `/Users/freddy/Documents/GitHub/454/ethnic/Data/bus/bus_stops_nyc_may2020.shp'
## using driver `ESRI Shapefile'
## Simple feature collection with 14663 features and 6 fields
## Geometry type: POINT
## Dimension: XY
## Bounding box: xmin: 914169.1 ymin: 122626.8 xmax: 1066982 ymax: 271696.8
## Projected CRS: NAD83 / New York Long Island (ftUS)
evictions <- read_csv(here("ethnic","Data","evictions.csv")) %>%
drop_na(Longitude) %>%
drop_na(Latitude) %>%
st_as_sf(coords = c("Longitude", "Latitude"), crs = 4269)
transit_points <- read_csv(here("transit","ridership_points.csv"))%>%
separate(Position, into=c("Point", "longitude", "latitude"), " ") %>%
mutate(latitude = str_remove_all(latitude, "[)]"),
longitude = str_remove_all(longitude, "[()]"),
) %>%
dplyr::select(-c(Point)) %>%
mutate(latitude = as.numeric(latitude),
longitude = as.numeric(longitude)) %>%
st_as_sf(coords = c("longitude", "latitude"), crs = 4269)
#green_space <- head(read_csv(here("ethnic","Data","parks.csv"))) # coordinate
#access_income <- read_csv(here("ethnic","Data","transit_income.csv"))
#rental_price <- read_csv(here("ethnic","Data","median_rent.csv"))
nyc_schools_join <- st_join(census_nta, public_schools) %>%
group_by(nta_id) %>%
summarize(school_count=n())
nyc_grocery_join <- st_join(census_nta, grocery) %>%
group_by(nta_id) %>%
summarize(store_count=n())
nyc_evictions_join <- st_join(census_nta, evictions)%>%
group_by(nta_id) %>%
summarize(eviction_count=n())
nyc_stop_join <- st_join(census_nta, subway_stations)%>%
group_by(nta_id) %>%
summarize(sub_count=n())
nyc_bus_stop_join <- st_join(census_nta, bus_stations)%>%
group_by(nta_id) %>%
summarize(bus_count=n())
nyc_ridership_join <- st_join(census_nta, transit_points) %>%
group_by(nta_id) %>%
summarize(mean_ridership = mean(`2018Ridership`))
# simple plot shows all locations
library(s2)
#plot locations over map
subway_loc <- ggplot() +
geom_sf(data = census_nta, fill = "#EBF6FF", color = "#D48DD8", size = 0.15, alpha = .8) +
geom_sf(data = subway_stations, color="#3F123C", size=1) +
coord_sf(datum = st_crs(subway_stations)) +
theme_minimal() +
theme(panel.grid.major = element_line("transparent"),
axis.text = element_blank()) +
ggtitle("Subway Stop Locations \nin NYC")+
theme(#panel.grid.major = element_line("transparent"),
plot.title = element_text(size = 13, face = "bold"),
legend.title = element_text(size = 8),
legend.text = element_text(size = 8)) +
guides(shape = guide_legend(override.aes = list(size = 4)),
color = guide_legend(override.aes = list(size = 4)))
bus_loc <- ggplot() +
geom_sf(data = census_nta, fill = "#EBF6FF", color = "#D48DD8", size = 0.15, alpha = .8) +
geom_sf(data = bus_stations, color="#3F123C", size=.5, alpha=.5) +
coord_sf(datum = st_crs(subway_stations)) +
theme_minimal() +
theme(panel.grid.major = element_line("transparent"),
axis.text = element_blank()) +
ggtitle("Bus Stop Locations \nin NYC")+
theme(#panel.grid.major = element_line("transparent"),
plot.title = element_text(size = 13, face = "bold"),
legend.title = element_text(size = 8),
legend.text = element_text(size = 8)) +
guides(shape = guide_legend(override.aes = list(size = 4)),
color = guide_legend(override.aes = list(size = 4)))
stops <- nyc_stop_join %>%
ggplot() +
geom_sf(aes(fill = sub_count), color = "#8f98aa") +
scale_fill_gradient(low = "#EBF6FF", high = "#BC24B0",
guide = guide_legend(title = "Number of Subway Stops") ,na.value="#D6D6D6") +
theme_minimal() +
theme(panel.grid.major = element_line("transparent"),
axis.text = element_blank()) +
ggtitle("Subway Stop Counts \nin NYC")+
theme(panel.grid.major = element_line("transparent"),
plot.title = element_text(size = 13, face = "bold"),
legend.title = element_text(size = 8),
legend.text = element_text(size = 8)) +
guides(shape = guide_legend(override.aes = list(size = 4)),
color = guide_legend(override.aes = list(size = 4)))
bus_stops <- nyc_bus_stop_join %>%
ggplot() +
geom_sf(aes(fill = bus_count), color = "#8f98aa") +
scale_fill_gradient(low = "#EBF6FF", high = "#BC24B0",
guide = guide_legend(title = "Number of Bus Stops") ,na.value="#D6D6D6") +
theme_minimal() +
theme(panel.grid.major = element_line("transparent"),
axis.text = element_blank()) +
ggtitle("Bus Stop Counts \nin NYC")+
theme(panel.grid.major = element_line("transparent"),
plot.title = element_text(size = 13, face = "bold"),
legend.title = element_text(size = 8),
legend.text = element_text(size = 8)) +
guides(shape = guide_legend(override.aes = list(size = 4)),
color = guide_legend(override.aes = list(size = 4)))
ridership <- nyc_ridership_join %>%
ggplot() +
geom_sf(aes(fill = mean_ridership), color = "#8f98aa") +
scale_fill_gradient(low = "#EBF6FF", high = "#BC24B0",
guide = guide_legend(title = "Mean Ridership") ,na.value="#D6D6D6") +
theme_minimal() +
theme(panel.grid.major = element_line("transparent"),
axis.text = element_blank()) +
ggtitle("Mean Subway Turnstile \nRidership in 2018 \nfor NYC")+
theme(panel.grid.major = element_line("transparent"),
plot.title = element_text(size = 13, face = "bold"),
legend.title = element_text(size = 8),
legend.text = element_text(size = 8)) +
guides(shape = guide_legend(override.aes = list(size = 4)),
color = guide_legend(override.aes = list(size = 4)))
library(egg)
ggarrange(subway_loc, bus_loc, stops, bus_stops, ridership, ncol=2)
red <- ggplot(census_nta) +
geom_sf(aes(fill = below_poverty_line_percent), color = "#8f98aa") +
scale_fill_gradient(low = "#FCF5EE", high = "#E13728", guide = guide_legend(title = "Percent Below \nPoverty Line")) +
theme_minimal() +
theme(panel.grid.major = element_line("transparent"),
axis.text = element_blank()) +
ggtitle("Impoverished Populations")+
theme(panel.grid.major = element_line("transparent"),
plot.title = element_text(size = 13, face = "bold"),
legend.title = element_text(size = 8),
legend.text = element_text(size = 8)) +
guides(shape = guide_legend(override.aes = list(size = 4)),
color = guide_legend(override.aes = list(size = 4)))
yellow <- ggplot(census_nta) +
geom_sf(aes(fill = mean_income), color = "#8f98aa") +
scale_fill_gradient(low = "#FCF5EE", high = "#F3D24E", guide = guide_legend(title = "Mean Income")) +
theme_minimal() +
theme(panel.grid.major = element_line("transparent"),
axis.text = element_blank()) +
ggtitle("Mean Income")+
theme(panel.grid.major = element_line("transparent"),
plot.title = element_text(size = 13, face = "bold"),
legend.title = element_text(size = 8),
legend.text = element_text(size = 8)) +
guides(shape = guide_legend(override.aes = list(size = 4)),
color = guide_legend(override.aes = list(size = 4)))
teal <- ggplot(census_nta) +
geom_sf(aes(fill = mean_rent), color = "#8f98aa") +
scale_fill_gradient(low = "#FCF5EE", high = "#2DBDC7", guide = guide_legend(title = "Dollars")) +
theme_minimal() +
theme(panel.grid.major = element_line("transparent"),
axis.text = element_blank()) +
ggtitle("Mean Rent")+
theme(panel.grid.major = element_line("transparent"),
plot.title = element_text(size = 13, face = "bold"),
legend.title = element_text(size = 8),
legend.text = element_text(size = 8)) +
guides(shape = guide_legend(override.aes = list(size = 4)),
color = guide_legend(override.aes = list(size = 4)))
purple <- ggplot(nyc_evictions_join) +
geom_sf(aes(fill = eviction_count), color = "#8f98aa")+
scale_fill_gradient(low = "#FCF5EE", high = "#7826C0", guide = guide_legend(title = "Number of Evictions")) +
theme_minimal() +
theme(panel.grid.major = element_line("transparent"),
axis.text = element_blank()) +
ggtitle("Evictions")+
theme(panel.grid.major = element_line("transparent"),
plot.title = element_text(size = 13, face = "bold"),
legend.title = element_text(size = 8),
legend.text = element_text(size = 8)) +
guides(shape = guide_legend(override.aes = list(size = 4)),
color = guide_legend(override.aes = list(size = 4)))
orange <- ggplot(census_nta) +
geom_sf(aes(fill = unemployment_percent), color = "#8f98aa")+
scale_fill_gradient(low = "#FCF5EE", high = "#FC9228", guide = guide_legend(title = "Percent on \nUnemployment")) +
theme_minimal() +
theme(panel.grid.major = element_line("transparent"),
axis.text = element_blank()) +
ggtitle("Unemployment")+
theme(panel.grid.major = element_line("transparent"),
plot.title = element_text(size = 13, face = "bold"),
legend.title = element_text(size = 8),
legend.text = element_text(size = 8)) +
guides(shape = guide_legend(override.aes = list(size = 4)),
color = guide_legend(override.aes = list(size = 4)))
green <- ggplot(nyc_grocery_join) +
geom_sf(aes(fill = store_count), color = "#8f98aa")+
scale_fill_gradient(low = "#FCF5EE", high = "#326902", guide = guide_legend(title = "Number of Stores")) +
theme_minimal() +
theme(panel.grid.major = element_line("transparent"),
axis.text = element_blank()) +
ggtitle("Retail Food Stores")+
theme(panel.grid.major = element_line("transparent"),
plot.title = element_text(size = 13, face = "bold"),
legend.title = element_text(size = 8),
legend.text = element_text(size = 8)) +
guides(shape = guide_legend(override.aes = list(size = 4)),
color = guide_legend(override.aes = list(size = 4)))
# yellow_green <- ggplot(census_nta) +
# geom_sf(aes(fill = uninsured_percent), color = "#8f98aa")+
# scale_fill_gradient(low = "#FCF5EE", high = "#939E28", guide = guide_legend(title = "Percent Uninsured")) +
# theme_minimal() +
# theme(panel.grid.major = element_line("transparent"),
# axis.text = element_blank()) +
# ggtitle("Green Space")+
# theme(panel.grid.major = element_line("transparent"),
# plot.title = element_text(size = 13, face = "bold"),
# legend.title = element_text(size = 8),
# legend.text = element_text(size = 8)) +
# guides(shape = guide_legend(override.aes = list(size = 4)),
# color = guide_legend(override.aes = list(size = 4)))
blue <- ggplot(nyc_schools_join) +
geom_sf(aes(fill = school_count), color = "#8f98aa")+
scale_fill_gradient(low = "#FCF5EE", high = "#5372C4",
guide = guide_legend(title = "Number of Schools")) +
theme_minimal() +
theme(panel.grid.major = element_line("transparent"),
axis.text = element_blank()) +
ggtitle("Number of Schools")+
theme(panel.grid.major = element_line("transparent"),
plot.title = element_text(size = 13, face = "bold"),
legend.title = element_text(size = 8),
legend.text = element_text(size = 8)) +
guides(shape = guide_legend(override.aes = list(size = 4)),
color = guide_legend(override.aes = list(size = 4)))
pink <- ggplot(census_nta) +
geom_sf(aes(fill = total_pop), color = "#8f98aa")+
scale_fill_gradient(low = "#FCF5EE", high = "#F450E1", guide = guide_legend(title = "Number of People")) +
theme_minimal() +
theme(panel.grid.major = element_line("transparent"),
axis.text = element_blank()) +
ggtitle("Population")+
theme(panel.grid.major = element_line("transparent"),
plot.title = element_text(size = 13, face = "bold"),
legend.title = element_text(size = 8),
legend.text = element_text(size = 8)) +
guides(shape = guide_legend(override.aes = list(size = 4)),
color = guide_legend(override.aes = list(size = 4)))
brown <- ggplot(census_nta) +
geom_sf(aes(fill = uninsured_percent), color = "#8f98aa")+
scale_fill_gradient(low = "#F8E3DD", high = "#6A4D39", guide = guide_legend(title = "Percent of People without Insurance Coverage")) +
theme_minimal() +
theme(panel.grid.major = element_line("transparent"),
axis.text = element_blank()) +
ggtitle("Insurance Coverage")+
theme(panel.grid.major = element_line("transparent"),
plot.title = element_text(size = 13, face = "bold"),
legend.title = element_text(size = 8),
legend.text = element_text(size = 8)) +
guides(shape = guide_legend(override.aes = list(size = 4)),
color = guide_legend(override.aes = list(size = 4)))
ggarrange(red, orange, yellow, green, teal, blue, purple, pink, brown, ncol=2)
Next, we use the same dataset to look at how our population demographic outcomes vary by neighborhood.
white <- ggplot(nyc_join) +
geom_sf(aes(fill = pop_white_pct_est), color = "#8f98aa") +
scale_fill_gradient(low = "#FCF5EE", high = "#7B435B", guide = guide_legend(title = "Percent White")) +
theme_minimal() +
theme(panel.grid.major = element_line("transparent"),
axis.text = element_blank()) +
ggtitle("White Population")+
theme(panel.grid.major = element_line("transparent"),
plot.title = element_text(size = 15, face = "bold"),
legend.title = element_text(size = 8),
legend.text = element_text(size = 8)) +
guides(shape = guide_legend(override.aes = list(size = 4)),
color = guide_legend(override.aes = list(size = 4)))
black <- ggplot(nyc_join) +
geom_sf(aes(fill = pop_black_pct_est), color = "#8f98aa") +
scale_fill_gradient(low = "#FCF5EE", high = "#F25F5C", guide = guide_legend(title = "Percent Black")) +
theme_minimal() +
theme(panel.grid.major = element_line("transparent"),
axis.text = element_blank()) +
ggtitle("Black Population")+
theme(panel.grid.major = element_line("transparent"),
plot.title = element_text(size = 15, face = "bold"),
legend.title = element_text(size = 8),
legend.text = element_text(size = 8)) +
guides(shape = guide_legend(override.aes = list(size = 4)),
color = guide_legend(override.aes = list(size = 4)))
asian <- ggplot(nyc_join) +
geom_sf(aes(fill = pop_asian_pct_est), color = "#8f98aa") +
scale_fill_gradient(low = "#FCF5EE", high = "#717EC3", guide = guide_legend(title = "Percent Asian")) +
theme_minimal() +
theme(panel.grid.major = element_line("transparent"),
axis.text = element_blank()) +
ggtitle("Asian Population")+
theme(panel.grid.major = element_line("transparent"),
plot.title = element_text(size = 15, face = "bold"),
legend.title = element_text(size = 8),
legend.text = element_text(size = 8)) +
guides(shape = guide_legend(override.aes = list(size = 4)),
color = guide_legend(override.aes = list(size = 4)))
latinx <- ggplot(nyc_join) +
geom_sf(aes(fill = pop_hisp_pct_est), color = "#8f98aa")+
scale_fill_gradient(low = "#FCF5EE", high = "#FC9A38", guide = guide_legend(title = "Percent Latinx")) +
theme_minimal() +
theme(panel.grid.major = element_line("transparent"),
axis.text = element_blank()) +
ggtitle("Latinx Population")+
theme(panel.grid.major = element_line("transparent"),
plot.title = element_text(size = 15, face = "bold"),
legend.title = element_text(size = 8),
legend.text = element_text(size = 8)) +
guides(shape = guide_legend(override.aes = list(size = 4)),
color = guide_legend(override.aes = list(size = 4)))
ggarrange(white, black, latinx, asian, ncol=2)